Opg. 1

Gradienten er

\(\begin{bmatrix} \frac{1}{n} \sum_i^n 2(a + bs_i - d_i) \\ \frac{1}{n} \sum_i^n 2(a + bs_i -d_i)s_i \end{bmatrix}\)

ab <- c(-20,3)
n <- 50
ms <- function(ab) {
  ab[1] + ab[2] * cars$speed
}

f_i <- function(ab) {
  (ms(ab) - cars$dist)^2
}

f <- function(ab) {
  1/n * sum(f_i(ab))
}

d_f <- function(ab) {
  grad(f,ab)
}

dd_f <- function(ab) {
  hessian(f,ab)
}

backtrack <- function(a_bar = 3, rho = 0.3, c = 0.2, func = f, Dfunc = d_f, x_k = ab, c_2 = 0.5) {
  a <- a_bar
  itt <- 0
  while ( (func(x_k + a * (-Dfunc(x_k)) ) ) > (func(x_k) + c * a * t(Dfunc(x_k)) %*% (-Dfunc(x_k))) ) {
  # while (t(Dfunc(x_k + a * -Dfunc(x_k))) %*% -Dfunc(x_k) < c_2 * t(Dfunc(x_k)) %*% -Dfunc(x_k) ) {
    itt <- itt + 1
    a <- rho * a
  # }
  }
  a
}
#steepest descent
optimer_identitet <- function(x_0 = ab) {
  x_k <- x_0
  a_k <- 2
  itt <- 0
  steps <- c()
  plot(dist ~ speed, cars)
  while (norm(t(d_f(x_k))) > 1e-5) {
    itt <- 1 + itt
    p_k <- -d_f(x_k)
    a_k <- backtrack(a_k, rho = 0.5, c = 0.001, f, d_f, x_k)
    x_k <- x_k + a_k * t(p_k)
    steps[itt] <- a_k
    if (itt %% 500 == 0) {
      abline(x_k)
    }
    if (itt == 100000) {
      break
    }
  }
  cat("x* = ", x_k, "f(x*) =", f(x_k), "iteration =", itt, "steps = ", steps[1:5])
}
# optimer_identitet()
#newton
optimer_newton <- function(x_0 = ab, output = TRUE) {
  x_k <- x_0
  a_k <- 2
  itt <- 0
  steps <- c()
  x_plot <- c()
  y_plot <- c()
  plot(dist ~ speed, cars)
  while (norm(t(d_f(x_k))) > 1e-4) {
    itt <- 1 + itt
    p_k <- solve(dd_f(x_k), -d_f(x_k))
    a_k <- backtrack(a_k, rho = 0.5, c = 0.01, f, d_f, x_k)
    x_k <- x_k + a_k * t(p_k)
    if (itt %% 50 == 0) {
      abline(x_k)
      x_plot[itt/50] <- x_k[1]
      y_plot[itt/50] <- x_k[2]
    }
    steps[itt] <- a_k
    if (itt == 100000) {
      break
    }
  }
  if (output == TRUE) {
    cat("x* = ", x_k, "f(x*) =", f(x_k), "iteration =", itt, "steps = ", steps[1:5])
  }
  else
  cbind(x_plot, y_plot)
}
xy <- optimer_newton(output = FALSE)
optimer_newton(output = TRUE)

## x* =  -17.5791 3.932409 f(x*) = 227.0704 iteration = 7956 steps =  0.001953125 0.001953125 0.001953125 0.001953125 0.001953125
ms_plot <- function(x,y) {
  x + y * cars$speed
}

f_i_plot <- function(x,y) {
  (ms_plot(x,y) - cars$dist)^2
}

f_plot <- function(x,y) {
  1/n * sum(f_i_plot(x,y))
}


fkts_vaerdier <- apply(xy, 1, f)
# x <- seq(-20,-17,length = 200)
# y <- seq(2.9,4.2, length = 200)
x <- xy[,1]
y <- xy[,2]
z <- outer(x, y, FUN = Vectorize("f_plot"))
plot_ly(x = x, y = y, z = ~z) %>% add_surface()  %>% 
  add_trace(x = xy[,1], y = xy[,2], z = fkts_vaerdier)
## No trace type specified:
##   Based on info supplied, a 'scatter3d' trace seems appropriate.
##   Read more about this trace type -> https://plot.ly/r/reference/#scatter3d
## No scatter3d mode specifed:
##   Setting the mode to markers
##   Read more about this attribute -> https://plot.ly/r/reference/#scatter-mode
summary(lm(cars$dist ~ cars$speed))
## 
## Call:
## lm(formula = cars$dist ~ cars$speed)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -29.069  -9.525  -2.272   9.215  43.201 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -17.5791     6.7584  -2.601   0.0123 *  
## cars$speed    3.9324     0.4155   9.464 1.49e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 15.38 on 48 degrees of freedom
## Multiple R-squared:  0.6511, Adjusted R-squared:  0.6438 
## F-statistic: 89.57 on 1 and 48 DF,  p-value: 1.49e-12
optim(ab, f, hessian = TRUE)
## $par
## [1] -17.583175   3.932699
## 
## $value
## [1] 227.0704
## 
## $counts
## function gradient 
##       53       NA 
## 
## $convergence
## [1] 0
## 
## $message
## NULL
## 
## $hessian
##      [,1]   [,2]
## [1,]  2.0  30.80
## [2,] 30.8 529.12
phi_lig_nul <- function(x_k) {
  1/n * sum((x_k[2] * cars$speed + x_k[1] - cars$dist) /(d_f(x_k)[2] * cars$speed + d_f(x_k)[1]))
}

optimer_identitet_phi <- function(x_0 = ab) {
  x_k <- x_0
  a_k <- 2
  itt <- 0
  steps <- c()
  plot(dist ~ speed, cars)
  while (norm(t(d_f(x_k))) > 1e-5) {
    itt <- 1 + itt
    p_k <- -d_f(x_k)
    a_k <- phi_lig_nul(x_k)
    x_k <- x_k + a_k * t(p_k)
    steps[itt] <- a_k
    if (itt %% 1 == 0) {
      abline(x_k)
    }
    if (itt == 100000) {
      break
    }
  }
  cat("x* = ", x_k, "f(x*) =", f(x_k), "iteration =", itt, "steps = ", steps[1:5])
}
# optimer_identitet_phi()


optimer_newton_phi <- function(x_0 = ab) {
  x_k <- x_0
  a_k <- 2
  itt <- 0
  steps <- c()
  x_plot <- c()
  f_plot <- c()
  plot(dist ~ speed, cars)
  while (norm(t(d_f(x_k))) > 1e-5) {
    itt <- 1 + itt
    p_k <- solve(dd_f(x_k), -d_f(x_k))
    a_k <- phi_lig_nul(x_k)
    x_k <- x_k + a_k * t(p_k)
    if (itt %% 50 == 0) {
      abline(x_k)
    }
    steps[itt] <- a_k
    if (itt == 100000) {
      break
    }
  }
  cat("x* = ", x_k, "f(x*) =", f(x_k), "iteration =", itt, "steps = ", steps[1:5])
}
optimer_newton_phi()

## x* =  -17.45905 3.978643 f(x*) = 227.8213 iteration = 1e+05 steps =  0.002058539 0.00205874 0.002058941 0.002059142 0.002059344